Code
library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
library(svglite)Libraries
library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
library(svglite)Paths
metadata_path <-
"data/processed/metadata_ashton_desj_all_weavepop_H99.csv"
duplications_path <-
"results/tables/duplications.tsv"
merged_tree_path <-
"data/processed/tree_merged.newick"Include color palettes from R script
source("scripts/metadata_colors.R")Example of color palette objects
lineage_colors VNBI VNBII VNI VNII
"#1B9E77" "#D95F02" "#7570B3" "#E7298A"
Read tree
tree_original <- read.tree(merged_tree_path)Read and organize metadata.
To add metadata to a ggtree object (ggtree(tree) %<+% metadata), the first column of the metadata table must contain the labels in tree$tip.label.
metadata <- read.csv(
metadata_path,
header = TRUE) %>%
select(strain, sample, source, lineage, country_of_origin, dataset)
head(metadata)| strain | sample | source | lineage | country_of_origin | dataset |
|---|---|---|---|---|---|
| 20427_2#42 | ERS1142739 | Clinical | VNI | Vietnam | Ashton |
| 04CN-64-065 | ERS2540972 | Clinical | VNI | Uganda | Ashton |
| BMD915 | ERS2541156 | Clinical | VNI | Vietnam | Ashton |
| 14892_1#7 | ERS542302 | Clinical | VNI | Vietnam | Ashton |
| BMD942 | ERS2541105 | Clinical | VNI | Vietnam | Ashton |
| BMD2801 | ERS2541198 | Clinical | VNI | Vietnam | Ashton |
Remove tips that are not in metadata$strain
tree <- drop.tip(tree_original, setdiff(tree_original$tip.label, metadata$strain))Color the points of the tips with geom_tippoint() This works even when not all tips are in the metadata table, NA values are used when needed.
ggtree(tree_original, layout = "circular") %<+% metadata +
geom_tippoint(aes(color = lineage), size = 2)+
scale_color_manual(name="Lineage", values= lineage_colors)Add the tip labels (strain) and color them by a column in the metadata.
ggtree(tree, layout = "circular") %<+% metadata +
geom_tiplab(aes(color = source), size = 2, align = TRUE)+
scale_color_manual(name="Source", values= source_colors)To identify the most basal node of each lineage, get the Most Recent Common Ancestor of a pair of strains in the lineage, they have to be two strains whose MRCA is at the base of the clade. You can also use this to identify any clade of interest.
VNI_node <- getMRCA(tree, c("Tu241-1","UI_31647-2"))
VNII_node <- getMRCA(tree, c("C2","C12"))
VNBI_node <- getMRCA(tree, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(tree, c("MW-RSA3321","MW-RSA3179"))
nodes_lineages <- data.frame(
lineage = c("VNI", "VNII", "VNBI", "VNBII"),
mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node)
)Get the node number of the MRCA of each VNI sublineage
VNIa4_node <- getMRCA(tree, c("04CN-30-008","UI_31647-2"))
VNIa5_node <- getMRCA(tree, c("BMD852","04CN-64-014"))
VNIa93_node <- getMRCA(tree, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(tree, c("BMD942","BMD2801"))
VNIaX_node <- getMRCA(tree, c("Bt48","04CN-63-007"))
VNIaY_node <- getMRCA(tree, c("04CN-65-073","Bt138"))
VNIb_node <- getMRCA(tree, c("04CN-65-096","MW-RSA722"))
VNIc_node <- getMRCA(tree, c("Bt20","Bt11"))
nodes_sublineages <- data.frame(
sublineage = c(
"VNIa-4", "VNIa-5", "VNIa-93",
"VNIa-32", "VNIa-X", "VNIa-Y",
"VNIb", "VNIc"),
mrca = c(
VNIa4_node, VNIa5_node, VNIa93_node,
VNIa32_node, VNIaX_node, VNIaY_node,
VNIb_node, VNIc_node))To print the tree with all the node labels:
ggtree(tree, layout = "circular")+
geom_text(aes(label = node), size = 3, vjust = -0.5)ggtree(tree, layout = "circular") +
geom_cladelab(data = nodes_lineages,
mapping = aes(node = mrca, label = lineage, color = lineage))+
scale_color_manual(name="Lineage", values= lineage_colors)ggtree(tree, layout = "circular") +
geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]),
size = 4, , fontface = "bold",
hjust = 1.25, vjust = -0.5)To color the branches of clades, create groups in the tree object with groupClade().
tree <- groupClade(tree, setNames(nodes_sublineages$mrca, nodes_sublineages$sublineage))And color the tree by aes(color = group). Use scale_color_manual() with a palette to specify the colors of the clades and avoid coloring the branches that are not part of the selected clades.
ggtree(tree, aes(color = group), layout = "circular",
size = 1) +
scale_color_manual(
name = "Sublineage",
values = sublineage_colors
)ggtree(tree, aes(color = group),
layout = "circular", size = 1) %<+% metadata +
scale_color_manual(
name = "Sublineage",
values = sublineage_colors)+
new_scale_color()+
geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]),
size = 4, , fontface = "bold",
hjust = 1.25, vjust = -0.5)+
geom_tippoint(aes(color = source), size = 1)+
scale_color_manual(name = "Source", values = source_colors)ggtree(tree, layout = "circular") %<+% metadata +
geom_cladelab(data = nodes_sublineages,
mapping = aes(node = mrca, label = sublineage, color = sublineage),
align = TRUE, face = "bold")+
scale_color_manual(name="Sublineage", values= sublineage_colors)+
geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]),
size = 6, , fontface = "bold",
hjust = 1.25, vjust = -0.5)+
new_scale_color() +
geom_tippoint(aes(color = source), size = 2)+
scale_color_manual(name = "Source", values = source_colors)ggtree(tree, layout = "circular", size = 1, branch.length = "none") %<+% metadata +
geom_hilight(data=nodes_sublineages, aes(node=mrca, fill=sublineage), alpha = 0.5)+
scale_fill_brewer(name = "Lineage", palette = "Dark2")+
new_scale_fill()+
geom_cladelab(data = nodes_sublineages,
mapping = aes(node = mrca, label = sublineage),
align = TRUE, face = "bold",
angle = "auto")Create a dataframe where the rownames are the tree’s tip labels, each column is a column of the heatmap and the values are the colors in the heatmap. If using categorical values for the heatmap and their order in the legend is important, convert the column to factor and set the order of the levels.
country <- metadata %>%
select(strain, country_of_origin)%>%
column_to_rownames("strain")
country$country_of_origin <- factor(country$country_of_origin,
levels = names(country_colors))To add a heamap, the function gheatmap() takes an object made with ggtree() and add the dataframe with the matrix to use in the heatmap. Use the offset option to separate the heatmap from the tree. Use the guides() function to order the legends. Use the option limits in scale_fill_manual to set the order of the legend labels to match the order of the levels in the factor.
p <- ggtree(tree, branch.length = "none") %<+% metadata
gheatmap(p, country, width=0.1, colnames = FALSE,
colnames_position = "top",
offset=7,
font.size = 6) +
geom_tiplab(aes(color = lineage), size = 3)+
scale_color_manual(name = "Lineage", values = lineage_colors)+
scale_fill_manual(values=country_colors,
name="Country", na.translate=FALSE,
limits = levels(country$country_of_origin))+
guides(color = guide_legend(order = 1), fill = guide_legend(order = 2))+
theme(legend.position = "right")